/*
START -> stream

stream
  directive -> line-end -> stream
  indent + line-end -> stream
  [else] -> line-start

line-end
  comment -> line-end
  newline -> .
  input-end -> END

line-start
  doc-start -> doc
  doc-end -> stream
  [else] -> indent -> block-start

block-start
  seq-item-start -> block-start
  explicit-key-start -> block-start
  map-value-start -> block-start
  [else] -> doc

doc
  line-end -> line-start
  spaces -> doc
  anchor -> doc
  tag -> doc
  flow-start -> flow -> doc
  flow-end -> error -> doc
  seq-item-start -> error -> doc
  explicit-key-start -> error -> doc
  map-value-start -> doc
  alias -> doc
  quote-start -> quoted-scalar -> doc
  block-scalar-header -> line-end -> block-scalar(min) -> line-start
  [else] -> plain-scalar(false, min) -> doc

flow
  line-end -> flow
  spaces -> flow
  anchor -> flow
  tag -> flow
  flow-start -> flow -> flow
  flow-end -> .
  seq-item-start -> error -> flow
  explicit-key-start -> flow
  map-value-start -> flow
  alias -> flow
  quote-start -> quoted-scalar -> flow
  comma -> flow
  [else] -> plain-scalar(true, 0) -> flow

quoted-scalar
  quote-end -> .
  [else] -> quoted-scalar

block-scalar(min)
  newline + peek(indent < min) -> .
  [else] -> block-scalar(min)

plain-scalar(is-flow, min)
  scalar-end(is-flow) -> .
  peek(newline + (indent < min)) -> .
  [else] -> plain-scalar(min)
*/

import { BOM, DOCUMENT, FLOW_END, SCALAR } from './cst.ts'

type State =
  | 'stream'
  | 'line-start'
  | 'block-start'
  | 'doc'
  | 'flow'
  | 'quoted-scalar'
  | 'block-scalar'
  | 'plain-scalar'

function isEmpty(ch: string) {
  switch (ch) {
    case undefined:
    case ' ':
    case '\n':
    case '\r':
    case '\t':
      return true
    default:
      return false
  }
}

const hexDigits = new Set('0123456789ABCDEFabcdef')
const tagChars = new Set(
  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-#;/?:@&=+$_.!~*'()"
)
const flowIndicatorChars = new Set(',[]{}')
const invalidAnchorChars = new Set(' ,[]{}\n\r\t')
const isNotAnchorChar = (ch: string) => !ch || invalidAnchorChars.has(ch)

/**
 * Splits an input string into lexical tokens, i.e. smaller strings that are
 * easily identifiable by `tokens.tokenType()`.
 *
 * Lexing starts always in a "stream" context. Incomplete input may be buffered
 * until a complete token can be emitted.
 *
 * In addition to slices of the original input, the following control characters
 * may also be emitted:
 *
 * - `\x02` (Start of Text): A document starts with the next token
 * - `\x18` (Cancel): Unexpected end of flow-mode (indicates an error)
 * - `\x1f` (Unit Separator): Next token is a scalar value
 * - `\u{FEFF}` (Byte order mark): Emitted separately outside documents
 */
export class Lexer {
  /**
   * Flag indicating whether the end of the current buffer marks the end of
   * all input
   */
  private atEnd = false

  /**
   * Explicit indent set in block scalar header, as an offset from the current
   * minimum indent, so e.g. set to 1 from a header `|2+`. Set to -1 if not
   * explicitly set.
   */
  private blockScalarIndent = -1

  /**
   * Block scalars that include a + (keep) chomping indicator in their header
   * include trailing empty lines, which are otherwise excluded from the
   * scalar's contents.
   */
  private blockScalarKeep = false

  /** Current input */
  private buffer = ''

  /**
   * Flag noting whether the map value indicator : can immediately follow this
   * node within a flow context.
   */
  private flowKey = false

  /** Count of surrounding flow collection levels. */
  private flowLevel = 0

  /**
   * Minimum level of indentation required for next lines to be parsed as a
   * part of the current scalar value.
   */
  private indentNext = 0

  /** Indentation level of the current line. */
  private indentValue = 0

  /** Position of the next \n character. */
  private lineEndPos: number | null = null

  /** Stores the state of the lexer if reaching the end of incpomplete input */
  private next: State | null = null

  /** A pointer to `buffer`; the current position of the lexer. */
  private pos = 0;

  /**
   * Generate YAML tokens from the `source` string. If `incomplete`,
   * a part of the last line may be left as a buffer for the next call.
   *
   * @returns A generator of lexical tokens
   */
  *lex(source: string, incomplete = false): Generator<string, void> {
    if (source) {
      if (typeof source !== 'string') throw TypeError('source is not a string')
      this.buffer = this.buffer ? this.buffer + source : source
      this.lineEndPos = null
    }
    this.atEnd = !incomplete
    let next: State | null = this.next ?? 'stream'
    while (next && (incomplete || this.hasChars(1)))
      next = yield* this.parseNext(next)
  }

  private atLineEnd() {
    let i = this.pos
    let ch = this.buffer[i]
    while (ch === ' ' || ch === '\t') ch = this.buffer[++i]
    if (!ch || ch === '#' || ch === '\n') return true
    if (ch === '\r') return this.buffer[i + 1] === '\n'
    return false
  }

  private charAt(n: number) {
    return this.buffer[this.pos + n]
  }

  private continueScalar(offset: number) {
    let ch = this.buffer[offset]
    if (this.indentNext > 0) {
      let indent = 0
      while (ch === ' ') ch = this.buffer[++indent + offset]
      if (ch === '\r') {
        const next = this.buffer[indent + offset + 1]
        if (next === '\n' || (!next && !this.atEnd)) return offset + indent + 1
      }
      return ch === '\n' || indent >= this.indentNext || (!ch && !this.atEnd)
        ? offset + indent
        : -1
    }
    if (ch === '-' || ch === '.') {
      const dt = this.buffer.substr(offset, 3)
      if ((dt === '---' || dt === '...') && isEmpty(this.buffer[offset + 3]))
        return -1
    }
    return offset
  }

  private getLine(): string | null {
    let end = this.lineEndPos
    if (typeof end !== 'number' || (end !== -1 && end < this.pos)) {
      end = this.buffer.indexOf('\n', this.pos)
      this.lineEndPos = end
    }
    if (end === -1) return this.atEnd ? this.buffer.substring(this.pos) : null
    if (this.buffer[end - 1] === '\r') end -= 1
    return this.buffer.substring(this.pos, end)
  }

  private hasChars(n: number) {
    return this.pos + n <= this.buffer.length
  }

  private setNext(state: State) {
    this.buffer = this.buffer.substring(this.pos)
    this.pos = 0
    this.lineEndPos = null
    this.next = state
    return null
  }

  private peek(n: number) {
    return this.buffer.substr(this.pos, n)
  }

  private *parseNext(next: State) {
    switch (next) {
      case 'stream':
        return yield* this.parseStream()
      case 'line-start':
        return yield* this.parseLineStart()
      case 'block-start':
        return yield* this.parseBlockStart()
      case 'doc':
        return yield* this.parseDocument()
      case 'flow':
        return yield* this.parseFlowCollection()
      case 'quoted-scalar':
        return yield* this.parseQuotedScalar()
      case 'block-scalar':
        return yield* this.parseBlockScalar()
      case 'plain-scalar':
        return yield* this.parsePlainScalar()
    }
  }

  private *parseStream() {
    let line = this.getLine()
    if (line === null) return this.setNext('stream')
    if (line[0] === BOM) {
      yield* this.pushCount(1)
      line = line.substring(1)
    }
    if (line[0] === '%') {
      let dirEnd = line.length
      let cs = line.indexOf('#')
      while (cs !== -1) {
        const ch = line[cs - 1]
        if (ch === ' ' || ch === '\t') {
          dirEnd = cs - 1
          break
        } else {
          cs = line.indexOf('#', cs + 1)
        }
      }
      while (true) {
        const ch = line[dirEnd - 1]
        if (ch === ' ' || ch === '\t') dirEnd -= 1
        else break
      }
      const n = (yield* this.pushCount(dirEnd)) + (yield* this.pushSpaces(true))
      yield* this.pushCount(line.length - n) // possible comment
      this.pushNewline()
      return 'stream'
    }
    if (this.atLineEnd()) {
      const sp = yield* this.pushSpaces(true)
      yield* this.pushCount(line.length - sp)
      yield* this.pushNewline()
      return 'stream'
    }
    yield DOCUMENT
    return yield* this.parseLineStart()
  }

  private *parseLineStart() {
    const ch = this.charAt(0)
    if (!ch && !this.atEnd) return this.setNext('line-start')
    if (ch === '-' || ch === '.') {
      if (!this.atEnd && !this.hasChars(4)) return this.setNext('line-start')
      const s = this.peek(3)
      if ((s === '---' || s === '...') && isEmpty(this.charAt(3))) {
        yield* this.pushCount(3)
        this.indentValue = 0
        this.indentNext = 0
        return s === '---' ? 'doc' : 'stream'
      }
    }
    this.indentValue = yield* this.pushSpaces(false)
    if (this.indentNext > this.indentValue && !isEmpty(this.charAt(1)))
      this.indentNext = this.indentValue
    return yield* this.parseBlockStart()
  }

  private *parseBlockStart(): Generator<string, 'doc' | null> {
    const [ch0, ch1] = this.peek(2)
    if (!ch1 && !this.atEnd) return this.setNext('block-start')
    if ((ch0 === '-' || ch0 === '?' || ch0 === ':') && isEmpty(ch1)) {
      const n = (yield* this.pushCount(1)) + (yield* this.pushSpaces(true))
      this.indentNext = this.indentValue + 1
      this.indentValue += n
      return yield* this.parseBlockStart()
    }
    return 'doc'
  }

  private *parseDocument() {
    yield* this.pushSpaces(true)
    const line = this.getLine()
    if (line === null) return this.setNext('doc')
    let n = yield* this.pushIndicators()
    switch (line[n]) {
      case '#':
        yield* this.pushCount(line.length - n)
      // fallthrough
      case undefined:
        yield* this.pushNewline()
        return yield* this.parseLineStart()
      case '{':
      case '[':
        yield* this.pushCount(1)
        this.flowKey = false
        this.flowLevel = 1
        return 'flow'
      case '}':
      case ']':
        // this is an error
        yield* this.pushCount(1)
        return 'doc'
      case '*':
        yield* this.pushUntil(isNotAnchorChar)
        return 'doc'
      case '"':
      case "'":
        return yield* this.parseQuotedScalar()
      case '|':
      case '>':
        n += yield* this.parseBlockScalarHeader()
        n += yield* this.pushSpaces(true)
        yield* this.pushCount(line.length - n)
        yield* this.pushNewline()
        return yield* this.parseBlockScalar()
      default:
        return yield* this.parsePlainScalar()
    }
  }

  private *parseFlowCollection() {
    let nl: number, sp: number
    let indent = -1
    do {
      nl = yield* this.pushNewline()
      if (nl > 0) {
        sp = yield* this.pushSpaces(false)
        this.indentValue = indent = sp
      } else {
        sp = 0
      }
      sp += yield* this.pushSpaces(true)
    } while (nl + sp > 0)
    const line = this.getLine()
    if (line === null) return this.setNext('flow')
    if (
      (indent !== -1 && indent < this.indentNext && line[0] !== '#') ||
      (indent === 0 &&
        (line.startsWith('---') || line.startsWith('...')) &&
        isEmpty(line[3]))
    ) {
      // Allowing for the terminal ] or } at the same (rather than greater)
      // indent level as the initial [ or { is technically invalid, but
      // failing here would be surprising to users.
      const atFlowEndMarker =
        indent === this.indentNext - 1 &&
        this.flowLevel === 1 &&
        (line[0] === ']' || line[0] === '}')
      if (!atFlowEndMarker) {
        // this is an error
        this.flowLevel = 0
        yield FLOW_END
        return yield* this.parseLineStart()
      }
    }
    let n = 0
    while (line[n] === ',') {
      n += yield* this.pushCount(1)
      n += yield* this.pushSpaces(true)
      this.flowKey = false
    }
    n += yield* this.pushIndicators()
    switch (line[n]) {
      case undefined:
        return 'flow'
      case '#':
        yield* this.pushCount(line.length - n)
        return 'flow'
      case '{':
      case '[':
        yield* this.pushCount(1)
        this.flowKey = false
        this.flowLevel += 1
        return 'flow'
      case '}':
      case ']':
        yield* this.pushCount(1)
        this.flowKey = true
        this.flowLevel -= 1
        return this.flowLevel ? 'flow' : 'doc'
      case '*':
        yield* this.pushUntil(isNotAnchorChar)
        return 'flow'
      case '"':
      case "'":
        this.flowKey = true
        return yield* this.parseQuotedScalar()
      case ':': {
        const next = this.charAt(1)
        if (this.flowKey || isEmpty(next) || next === ',') {
          this.flowKey = false
          yield* this.pushCount(1)
          yield* this.pushSpaces(true)
          return 'flow'
        }
      }
      // fallthrough
      default:
        this.flowKey = false
        return yield* this.parsePlainScalar()
    }
  }

  private *parseQuotedScalar() {
    const quote = this.charAt(0)
    let end = this.buffer.indexOf(quote, this.pos + 1)
    if (quote === "'") {
      while (end !== -1 && this.buffer[end + 1] === "'")
        end = this.buffer.indexOf("'", end + 2)
    } else {
      // double-quote
      while (end !== -1) {
        let n = 0
        while (this.buffer[end - 1 - n] === '\\') n += 1
        if (n % 2 === 0) break
        end = this.buffer.indexOf('"', end + 1)
      }
    }
    // Only looking for newlines within the quotes
    const qb = this.buffer.substring(0, end)
    let nl = qb.indexOf('\n', this.pos)
    if (nl !== -1) {
      while (nl !== -1) {
        const cs = this.continueScalar(nl + 1)
        if (cs === -1) break
        nl = qb.indexOf('\n', cs)
      }
      if (nl !== -1) {
        // this is an error caused by an unexpected unindent
        end = nl - (qb[nl - 1] === '\r' ? 2 : 1)
      }
    }
    if (end === -1) {
      if (!this.atEnd) return this.setNext('quoted-scalar')
      end = this.buffer.length
    }
    yield* this.pushToIndex(end + 1, false)
    return this.flowLevel ? 'flow' : 'doc'
  }

  private *parseBlockScalarHeader() {
    this.blockScalarIndent = -1
    this.blockScalarKeep = false
    let i = this.pos
    while (true) {
      const ch = this.buffer[++i]
      if (ch === '+') this.blockScalarKeep = true
      else if (ch > '0' && ch <= '9') this.blockScalarIndent = Number(ch) - 1
      else if (ch !== '-') break
    }
    return yield* this.pushUntil(ch => isEmpty(ch) || ch === '#')
  }

  private *parseBlockScalar() {
    let nl = this.pos - 1 // may be -1 if this.pos === 0
    let indent = 0
    let ch: string
    loop: for (let i = this.pos; (ch = this.buffer[i]); ++i) {
      switch (ch) {
        case ' ':
          indent += 1
          break
        case '\n':
          nl = i
          indent = 0
          break
        case '\r': {
          const next = this.buffer[i + 1]
          if (!next && !this.atEnd) return this.setNext('block-scalar')
          if (next === '\n') break
        } // fallthrough
        default:
          break loop
      }
    }
    if (!ch && !this.atEnd) return this.setNext('block-scalar')
    if (indent >= this.indentNext) {
      if (this.blockScalarIndent === -1) this.indentNext = indent
      else {
        this.indentNext =
          this.blockScalarIndent + (this.indentNext === 0 ? 1 : this.indentNext)
      }
      do {
        const cs = this.continueScalar(nl + 1)
        if (cs === -1) break
        nl = this.buffer.indexOf('\n', cs)
      } while (nl !== -1)
      if (nl === -1) {
        if (!this.atEnd) return this.setNext('block-scalar')
        nl = this.buffer.length
      }
    }

    // Trailing insufficiently indented tabs are invalid.
    // To catch that during parsing, we include them in the block scalar value.
    let i = nl + 1
    ch = this.buffer[i]
    while (ch === ' ') ch = this.buffer[++i]
    if (ch === '\t') {
      while (ch === '\t' || ch === ' ' || ch === '\r' || ch === '\n')
        ch = this.buffer[++i]
      nl = i - 1
    } else if (!this.blockScalarKeep) {
      do {
        let i = nl - 1
        let ch = this.buffer[i]
        if (ch === '\r') ch = this.buffer[--i]
        const lastChar = i // Drop the line if last char not more indented
        while (ch === ' ') ch = this.buffer[--i]
        if (ch === '\n' && i >= this.pos && i + 1 + indent > lastChar) nl = i
        else break
      } while (true)
    }
    yield SCALAR
    yield* this.pushToIndex(nl + 1, true)
    return yield* this.parseLineStart()
  }

  private *parsePlainScalar() {
    const inFlow = this.flowLevel > 0
    let end = this.pos - 1
    let i = this.pos - 1
    let ch: string
    while ((ch = this.buffer[++i])) {
      if (ch === ':') {
        const next = this.buffer[i + 1]
        if (isEmpty(next) || (inFlow && flowIndicatorChars.has(next))) break
        end = i
      } else if (isEmpty(ch)) {
        let next = this.buffer[i + 1]
        if (ch === '\r') {
          if (next === '\n') {
            i += 1
            ch = '\n'
            next = this.buffer[i + 1]
          } else end = i
        }
        if (next === '#' || (inFlow && flowIndicatorChars.has(next))) break
        if (ch === '\n') {
          const cs = this.continueScalar(i + 1)
          if (cs === -1) break
          i = Math.max(i, cs - 2) // to advance, but still account for ' #'
        }
      } else {
        if (inFlow && flowIndicatorChars.has(ch)) break
        end = i
      }
    }
    if (!ch && !this.atEnd) return this.setNext('plain-scalar')
    yield SCALAR
    yield* this.pushToIndex(end + 1, true)
    return inFlow ? 'flow' : 'doc'
  }

  private *pushCount(n: number) {
    if (n > 0) {
      yield this.buffer.substr(this.pos, n)
      this.pos += n
      return n
    }
    return 0
  }

  private *pushToIndex(i: number, allowEmpty: boolean) {
    const s = this.buffer.slice(this.pos, i)
    if (s) {
      yield s
      this.pos += s.length
      return s.length
    } else if (allowEmpty) yield ''
    return 0
  }

  private *pushIndicators(): Generator<string, number> {
    switch (this.charAt(0)) {
      case '!':
        return (
          (yield* this.pushTag()) +
          (yield* this.pushSpaces(true)) +
          (yield* this.pushIndicators())
        )
      case '&':
        return (
          (yield* this.pushUntil(isNotAnchorChar)) +
          (yield* this.pushSpaces(true)) +
          (yield* this.pushIndicators())
        )
      case '-': // this is an error
      case '?': // this is an error outside flow collections
      case ':': {
        const inFlow = this.flowLevel > 0
        const ch1 = this.charAt(1)
        if (isEmpty(ch1) || (inFlow && flowIndicatorChars.has(ch1))) {
          if (!inFlow) this.indentNext = this.indentValue + 1
          else if (this.flowKey) this.flowKey = false
          return (
            (yield* this.pushCount(1)) +
            (yield* this.pushSpaces(true)) +
            (yield* this.pushIndicators())
          )
        }
      }
    }
    return 0
  }

  private *pushTag() {
    if (this.charAt(1) === '<') {
      let i = this.pos + 2
      let ch = this.buffer[i]
      while (!isEmpty(ch) && ch !== '>') ch = this.buffer[++i]
      return yield* this.pushToIndex(ch === '>' ? i + 1 : i, false)
    } else {
      let i = this.pos + 1
      let ch = this.buffer[i]
      while (ch) {
        if (tagChars.has(ch)) ch = this.buffer[++i]
        else if (
          ch === '%' &&
          hexDigits.has(this.buffer[i + 1]) &&
          hexDigits.has(this.buffer[i + 2])
        ) {
          ch = this.buffer[(i += 3)]
        } else break
      }
      return yield* this.pushToIndex(i, false)
    }
  }

  private *pushNewline() {
    const ch = this.buffer[this.pos]
    if (ch === '\n') return yield* this.pushCount(1)
    else if (ch === '\r' && this.charAt(1) === '\n')
      return yield* this.pushCount(2)
    else return 0
  }

  private *pushSpaces(allowTabs: boolean) {
    let i = this.pos - 1
    let ch: string
    do {
      ch = this.buffer[++i]
    } while (ch === ' ' || (allowTabs && ch === '\t'))
    const n = i - this.pos
    if (n > 0) {
      yield this.buffer.substr(this.pos, n)
      this.pos = i
    }
    return n
  }

  private *pushUntil(test: (ch: string) => boolean) {
    let i = this.pos
    let ch = this.buffer[i]
    while (!test(ch)) ch = this.buffer[++i]
    return yield* this.pushToIndex(i, false)
  }
}
